﻿
************************************************************************************************
* Constructing data set to "Public Health Policy At Scale.."
************************************************************************************************

global rawStata "X:\Data\Workdata\707116\Stata"
global temp2 "X:\Data\Workdata\707116\ChildVax\02_temp\Nov2019" 


set mem 2g
set more off
set linesize 255

**********************************************************************************************
** Main data set - population and variables from medical birth records
*********************************************************************************************

*******************************************************************************************
* population born 1973-1996 in data set LPRMFRLF1996
***************************************************************************************
clear

use CPR_BARN CPR_FADER CPR_MODER ALDER_FADER ALDER_MODER flerfoldsgraviditet foedselsaar foedselsdato GESTATIONSALDER_DAGE LAENGDE_BARN LEVENDE_ELLER_DOEDFOEDT paritet VAEGT_BARN using $rawStata\mfr2014.dta
gen dod =1 if LEVENDE_ELLER_DOEDFOEDT == "Dødfødt"
drop if dod==1

gen f_match = !regexm(CPR_FADER,"[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]")
replace CPR_FADER = "" if CPR_FADER!="" & f_match

gen m_match = !regexm(CPR_MODER,"[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]")
replace CPR_MODER = "" if CPR_MODER !="" & m_match

gen b_match = !regexm(CPR_BARN,"[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]")
replace CPR_BARN = "" if CPR_BARN !="" & b_match

gen str12 fpnr_mfr = CPR_FADER
gen str12 mpnr_mfr = CPR_MODER
gen str12 pnr = CPR_BARN

assert foedselsdato!=.

g bday = day(foedselsdato)
label var bday "day of birth" 
g bmonth = month(foedselsdato)
label var bmonth "month of birth" 
replace bday=28 if bday==29 & bmonth==2
g byear = year(foedselsdato)
label var byear "year of birth" 

g bday_1yr = mdy(bmonth,bday,byear+1)
label var bday_1yr "1st birthday" 
g bday_5yr = mdy(bmonth,bday,byear+5)
label var bday_5yr "5th birthday" 

g bday_12yr = mdy(bmonth,bday,byear+12)
label var bday_12yr "12th birthday" 


format bday_1yr %tdD_m_Y 
format bday_5yr %tdD_m_Y 
format bday_12yr %tdD_m_Y 

********gestational age ****************************
****** only information on cohorts 1978-1996
rename GESTATIONSALDER_DAGE gage_days

gen gage_wks=round(gage_days/7)
tab gage_wks

*implausible gest age to missing 
replace gage_wks=. if (gage_wks<22 | gage_wks>50) 

g flag = (gage_wks==.) 
table byear, c(mean flag) 
drop flag 

** Pretermt according to WHO: 
** extreme preterm: <28 weeks/196 d, very preterm: 28 - <32 weeks/196-224. moderate to late preterm: <37 weeks/259 d 

gen ltpreterm=0
replace ltpreterm=1 if gage_wks<37
replace ltpreterm=. if gage_wks==. 


** Pre-term is before 37 completed weeks (259 days)
gen fullterm37=0
replace fullterm37=1 if gage_wks>=37
replace fullterm37=. if gage_wks==.

rename VAEGT_BARN birthweight
replace birthweight=. if birthweight==0 | birthweight>9000
g flag=(birthweight==.)
table byear, c(min birthweight max birthweight)
table byear flag
drop flag

*low birth weight*
g LBW=0 
replace LBW=1 if birthweight<2500
replace LBW=. if birthweight==.

g 			VLBW=0 
replace		VLBW=1 if birthweight<1500
replace     VLBW=. if birthweight==.

table byear, c(mean LBW mean VLBW)

******Mother age at birth ********************
gen mage = ALDER_MODER

sum mage 

*Singleton births
bys mpnr_mfr foedselsdato: g multibirth = _N
tab multibirth // this looks fine too 

drop if pnr==""
 
keep pnr fpnr_mfr mpnr_mfr birthweight LBW VLBW bday bmonth byear bday_1yr bday_5yr bday_12yr ///
gage_wks ltpreterm fullterm* mage multibirth 

sort pnr

save $temp2\lpr2014.dta, replace 

